PyPI - doctra - Versions diffs - 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

doctra 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

doctra/__init__.py +21 -18
doctra/cli/main.py +5 -2
doctra/cli/utils.py +12 -3
doctra/engines/layout/paddle_layout.py +13 -78
doctra/engines/vlm/provider.py +86 -58
doctra/engines/vlm/service.py +10 -14
doctra/exporters/html_writer.py +1235 -0
doctra/parsers/structured_pdf_parser.py +35 -15
doctra/parsers/table_chart_extractor.py +66 -28
doctra/ui/__init__.py +5 -0
doctra/ui/app.py +1012 -0
doctra/utils/progress.py +428 -0
doctra/utils/structured_utils.py +49 -49
doctra/version.py +1 -1
{doctra-0.1.1.dist-info → doctra-0.3.0.dist-info}/METADATA +45 -6
{doctra-0.1.1.dist-info → doctra-0.3.0.dist-info}/RECORD +19 -15
{doctra-0.1.1.dist-info → doctra-0.3.0.dist-info}/WHEEL +0 -0
{doctra-0.1.1.dist-info → doctra-0.3.0.dist-info}/licenses/LICENSE +0 -0
{doctra-0.1.1.dist-info → doctra-0.3.0.dist-info}/top_level.txt +0 -0

doctra/__init__.py CHANGED Viewed

@@ -1,19 +1,22 @@
-"""
-Doctra - Document Parsing Library
-Parse, extract, and analyze documents with ease
-"""
-from .parsers.structured_pdf_parser import StructuredPDFParser
-from .parsers.table_chart_extractor import ChartTablePDFParser
-from .version import __version__
-__all__ = [
-    'StructuredPDFParser',
-    'ChartTablePDFParser',
-    '__version__'
-]
-# Package metadata
-__author__ = 'Adem Boukhris'
-__email__ = 'boukhrisadam98@gmail.com'  # Replace with your email
+"""
+Doctra - Document Parsing Library
+Parse, extract, and analyze documents with ease
+"""
+from .parsers.structured_pdf_parser import StructuredPDFParser
+from .parsers.table_chart_extractor import ChartTablePDFParser
+from .version import __version__
+from .ui import build_demo, launch_ui
+__all__ = [
+    'StructuredPDFParser',
+    'ChartTablePDFParser',
+    'build_demo',
+    'launch_ui',
+    '__version__'
+]
+# Package metadata
+__author__ = 'Adem Boukhris'
+__email__ = 'boukhrisadam98@gmail.com'  # Replace with your email
 __description__ = 'Parse, extract, and analyze documents with ease'

doctra/cli/main.py CHANGED Viewed

@@ -259,6 +259,7 @@ def parse(pdf_path: Path, output_dir: Optional[Path], use_vlm: bool,
         click.echo(f"📄 Processing: {pdf_path.name}")
         parser.parse(str(pdf_path.absolute()))
         click.echo("✅ Full document processing completed successfully!")
+        click.echo(f"📁 Output directory: {output_dir.absolute() if output_dir else 'outputs/'}")
     except KeyboardInterrupt:
         click.echo("\n⚠️  Processing interrupted by user", err=True)
@@ -444,6 +445,7 @@ def tables(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
         click.echo(f"📄 Processing: {pdf_path.name}")
         parser.parse(str(pdf_path), str(output_dir))
         click.echo("✅ Table extraction completed successfully!")
+        click.echo(f"📁 Output directory: {output_dir.absolute()}")
     except KeyboardInterrupt:
         click.echo("\n⚠️  Extraction interrupted by user", err=True)
@@ -522,6 +524,7 @@ def both(pdf_path: Path, output_dir: Path, use_vlm: bool, vlm_provider: str,
         click.echo(f"📄 Processing: {pdf_path.name}")
         parser.parse(str(pdf_path), str(output_dir))
         click.echo("✅ Chart and table extraction completed successfully!")
+        click.echo(f"📁 Output directory: {output_dir.absolute()}")
     except KeyboardInterrupt:
         click.echo("\n⚠️  Extraction interrupted by user", err=True)
@@ -818,8 +821,8 @@ def info():
     # VLM providers
     click.echo("\nVLM Providers:")
-    click.echo("  • Gemini (Google) - gemini-1.5-flash-latest, gemini-1.5-pro")
-    click.echo("  • OpenAI - gpt-4o, gpt-4o-mini, gpt-4-vision-preview")
+    click.echo("  • Gemini (Google) - gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.0-flash")
+    click.echo("  • OpenAI - gpt-5, gpt-5-mini, gpt-4.1, gpt-4.1-mini, gpt-4o")
     # Available layout models
     click.echo("\nLayout Detection Models:")

doctra/cli/utils.py CHANGED Viewed

@@ -263,7 +263,7 @@ def create_progress_callback(description: str, total: int):
     """
     Create a progress callback function for use with processing operations.
-    Creates a tqdm progress bar and returns a callback function that
+    Creates a beautiful tqdm progress bar and returns a callback function that
     can be used to update the progress during long-running operations.
     :param description: Description text for the progress bar
@@ -271,9 +271,18 @@ def create_progress_callback(description: str, total: int):
     :return: Callable progress callback function that takes an integer
              representing the number of completed items
     """
-    from tqdm import tqdm
+    import sys
+    from doctra.utils.progress import create_beautiful_progress_bar, create_notebook_friendly_bar
-    pbar = tqdm(total=total, desc=description, leave=True)
+    # Enhanced environment detection
+    is_notebook = "ipykernel" in sys.modules or "jupyter" in sys.modules
+    is_terminal = hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
+    # Choose appropriate progress bar based on environment
+    if is_notebook:
+        pbar = create_notebook_friendly_bar(total=total, desc=description)
+    else:
+        pbar = create_beautiful_progress_bar(total=total, desc=description, leave=True)
     def callback(completed: int):
         pbar.n = completed

doctra/engines/layout/paddle_layout.py CHANGED Viewed

@@ -4,16 +4,15 @@ import os
 import sys
 import json
 import tempfile
-import logging
 from dataclasses import dataclass, asdict
 from typing import Dict, List, Any, Tuple, Optional
-from tqdm import tqdm
 from PIL import Image
 from paddleocr import LayoutDetection  # pip install paddleocr>=2.7.0.3
 from doctra.utils.pdf_io import render_pdf_to_images
 from doctra.engines.layout.layout_models import LayoutBox, LayoutPage
-from doctra.utils.quiet import suppress_output
+from doctra.utils.progress import create_loading_bar
+import warnings
 class PaddleLayoutEngine:
@@ -39,7 +38,7 @@ class PaddleLayoutEngine:
                           (default: "PP-DocLayout_plus-L")
         """
         self.model_name = model_name
-        self.model: Optional[LayoutDetection] = None
+        self.model: Optional["LayoutDetection"] = None
     def _ensure_model(self) -> None:
         """
@@ -53,80 +52,16 @@ class PaddleLayoutEngine:
         if self.model is not None:
             return
-        # Your own friendly progress line
-        with tqdm(total=1, desc=f'Loading PaddleOCR layout model: "{self.model_name}"', leave=True) as bar:
-            # Monkey patch tqdm to disable it completely during model loading
-            original_tqdm_init = tqdm.__init__
-            original_tqdm_update = tqdm.update
-            original_tqdm_close = tqdm.close
-            def silent_init(self, *args, **kwargs):
-                # Make all tqdm instances silent
-                kwargs['disable'] = True
-                original_tqdm_init(self, *args, **kwargs)
-            def silent_update(self, *args, **kwargs):
-                pass  # Do nothing
-            def silent_close(self, *args, **kwargs):
-                pass  # Do nothing
-            # More comprehensive output suppression
-            # Save original logging levels
-            original_levels = {}
-            loggers_to_silence = ['ppocr', 'paddle', 'PIL', 'urllib3', 'requests']
-            for logger_name in loggers_to_silence:
-                logger = logging.getLogger(logger_name)
-                original_levels[logger_name] = logger.level
-                logger.setLevel(logging.CRITICAL)
-            # Also try to silence the root logger temporarily
-            root_logger = logging.getLogger()
-            original_root_level = root_logger.level
-            root_logger.setLevel(logging.CRITICAL)
-            # Set environment variables that might help silence PaddlePaddle
-            old_env = {}
-            env_vars_to_set = {
-                'FLAGS_print_model_stats': '0',
-                'FLAGS_enable_parallel_graph': '0',
-                'GLOG_v': '4',  # Only show fatal errors
-                'GLOG_logtostderr': '0',
-                'GLOG_alsologtostderr': '0'
-            }
-            for key, value in env_vars_to_set.items():
-                old_env[key] = os.environ.get(key)
-                os.environ[key] = value
-            try:
-                # Monkey patch tqdm
-                tqdm.__init__ = silent_init
-                tqdm.update = silent_update
-                tqdm.close = silent_close
-                # Silence Paddle's download/init noise with enhanced suppression
-                with suppress_output():
-                    self.model = LayoutDetection(model_name=self.model_name)
-            finally:
-                # Restore tqdm methods
-                tqdm.__init__ = original_tqdm_init
-                tqdm.update = original_tqdm_update
-                tqdm.close = original_tqdm_close
-                # Restore logging levels
-                for logger_name, level in original_levels.items():
-                    logging.getLogger(logger_name).setLevel(level)
-                root_logger.setLevel(original_root_level)
-                # Restore environment variables
-                for key, old_value in old_env.items():
-                    if old_value is None:
-                        os.environ.pop(key, None)
-                    else:
-                        os.environ[key] = old_value
+        # Beautiful loading progress bar (no logging suppression)
+        with create_loading_bar(f'Loading PaddleOCR layout model: "{self.model_name}"') as bar:
+            # Suppress specific paddle extension warning: "No ccache found"
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore",
+                    message=r"No ccache found.*",
+                    category=UserWarning,
+                )
+                self.model = LayoutDetection(model_name=self.model_name)
             bar.update(1)
     def predict_pdf(

doctra/engines/vlm/provider.py CHANGED Viewed

@@ -1,58 +1,86 @@
-from __future__ import annotations
-# --- keep these imports to match your snippet style ---
-import io
-import PIL
-import openai
-import outlines
-from pydantic import BaseModel
-from google.genai import Client
-from outlines.inputs import Image
-# ------------------------------------------------------
-def make_model(
-    vlm_provider: str | None = "gemini",
-    vlm_model: str | None = None,
-    *,
-    api_key: str | None = None,
-):
-    """
-    Build a callable Outlines model for VLM processing.
-    Creates an Outlines model instance configured for either Gemini or OpenAI
-    providers. Only one backend is active at a time, with Gemini as the default.
-    :param vlm_provider: VLM provider to use ("gemini" or "openai", default: "gemini")
-    :param vlm_model: Model name to use (defaults to provider-specific defaults)
-    :param api_key: API key for the VLM provider (required for both Gemini and OpenAI)
-    :return: Configured Outlines model instance
-    :raises ValueError: If provider is unsupported or API key is missing
-    """
-    vlm_provider = (vlm_provider or "gemini").lower()
-    # Set default models if not provided
-    if vlm_model is None:
-        if vlm_provider == "gemini":
-            vlm_model = "gemini-1.5-flash-latest"
-        elif vlm_provider == "openai":
-            vlm_model = "gpt-4o"
-    if vlm_provider == "gemini":
-        if not api_key:
-            raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
-        # Create the model (exactly like your snippet)
-        return outlines.from_gemini(
-            Client(api_key=api_key),
-            vlm_model,
-        )
-    if vlm_provider == "openai":
-        if not api_key:
-            raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
-        # this part is for the openai models (exactly like your snippet)
-        return outlines.from_openai(
-            openai.OpenAI(api_key=api_key),
-            vlm_model,
-        )
-    raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini' or 'openai'.")
+from __future__ import annotations
+# --- keep these imports to match your snippet style ---
+import io
+import PIL
+import openai
+import outlines
+from pydantic import BaseModel
+from google.genai import Client
+from outlines.inputs import Image
+from anthropic import Anthropic
+# ------------------------------------------------------
+def make_model(
+    vlm_provider: str | None = "gemini",
+    vlm_model: str | None = None,
+    *,
+    api_key: str | None = None,
+):
+    """
+    Build a callable Outlines model for VLM processing.
+    Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, or OpenRouter
+    providers. Only one backend is active at a time, with Gemini as the default.
+    :param vlm_provider: VLM provider to use ("gemini", "openai", or "anthropic", default: "gemini")
+    :param vlm_model: Model name to use (defaults to provider-specific defaults)
+    :param api_key: API key for the VLM provider (required for all providers)
+    :return: Configured Outlines model instance
+    :raises ValueError: If provider is unsupported or API key is missing
+    """
+    vlm_provider = (vlm_provider or "gemini").lower()
+    # Set default models if not provided
+    if vlm_model is None:
+        if vlm_provider == "gemini":
+            vlm_model = "gemini-2.5-pro"
+        elif vlm_provider == "openai":
+            vlm_model = "gpt-5"
+        elif vlm_provider == "anthropic":
+            vlm_model = "claude-opus-4-1"
+        elif vlm_provider == "openrouter":
+            vlm_model = "x-ai/grok-4"
+    if vlm_provider == "gemini":
+        if not api_key:
+            raise ValueError("Gemini provider requires api_key to be passed to make_model(...).")
+        # Create the model (exactly like your snippet)
+        return outlines.from_gemini(
+            Client(api_key=api_key),
+            vlm_model,
+        )
+    if vlm_provider == "openai":
+        if not api_key:
+            raise ValueError("OpenAI provider requires api_key to be passed to make_model(...).")
+        # this part is for the openai models (exactly like your snippet)
+        return outlines.from_openai(
+            openai.OpenAI(api_key=api_key),
+            vlm_model,
+        )
+    if vlm_provider == "anthropic":
+        if not api_key:
+            raise ValueError("Anthropic provider requires api_key to be passed to make_model(...).")
+        # Create the Anthropic client and model (exactly like your snippet)
+        client = Anthropic(api_key=api_key)
+        return outlines.from_anthropic(
+            client,
+            vlm_model,
+        )
+    if vlm_provider == "openrouter":
+        if not api_key:
+            raise ValueError("OpenRouter provider requires api_key to be passed to make_model(...).")
+        # Create the Anthropic client and model (exactly like your snippet)
+        client = openai.OpenAI(
+            base_url="https://openrouter.ai/api/v1",
+            api_key=api_key,
+        )
+        return outlines.from_openai(
+            client,
+            vlm_model
+        )
+    raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', or 'anthropic'.")

doctra/engines/vlm/service.py CHANGED Viewed

@@ -15,9 +15,12 @@ class VLMStructuredExtractor:
     from images using Vision Language Models (VLM) with Outlines for type safety.
     Usage:
-        vlm = VLMStructuredExtractor(vlm_provider="gemini", api_key="YOUR_KEY", debug=True)
+        vlm = VLMStructuredExtractor(vlm_provider="gemini", api_key="YOUR_KEY")
         chart = vlm.extract_chart("/abs/path/chart.jpg")
         table = vlm.extract_table("/abs/path/table.jpg")
+        # Or with Anthropic:
+        vlm = VLMStructuredExtractor(vlm_provider="anthropic", api_key="YOUR_KEY")
     """
     def __init__(
@@ -26,25 +29,21 @@ class VLMStructuredExtractor:
         vlm_model: str | None = None,
         *,
         api_key: str | None = None,
-        debug: bool = True,
     ):
         """
         Initialize the VLMStructuredExtractor with provider configuration.
-        Sets up the VLM model and debug settings for structured data extraction
-        from images.
+        Sets up the VLM model for structured data extraction from images.
-        :param vlm_provider: VLM provider to use ("gemini" or "openai", default: "gemini")
+        :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", or "openrouter", default: "gemini")
         :param vlm_model: Model name to use (defaults to provider-specific defaults)
-        :param api_key: API key for the VLM provider (required for both Gemini and OpenAI)
-        :param debug: Whether to enable debug output for error handling (default: True)
+        :param api_key: API key for the VLM provider (required for all providers)
         """
         self.model = make_model(
             vlm_provider,
             vlm_model,
             api_key=api_key,
         )
-        self.debug = debug
     def _call(self, prompt_text: str, image_path: str, schema):
         """
@@ -68,13 +67,10 @@ class VLMStructuredExtractor:
                 img = img.convert("RGB")
             prompt = [prompt_text, Image(img)]
-            return self.model(prompt, schema)
+            result = self.model(prompt, schema)
+            return result
         except Exception as e:
-            if self.debug:
-                import traceback
-                print(f"[VLM ERROR] while processing: {image_path}")
-                traceback.print_exc()
-                print(f"[VLM ERROR] type={type(e).__name__} msg={e}")
             # Re-raise so caller can handle/log too
             raise

doctra 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

doctra 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl